# -*- coding:utf-8 -*-
# @Time:2024/4/1921:01
# @Author:miuzg
# @FileName:new test2.py
# @Software:PyCharm

import requests
from lxml import etree


class Driver:
    def __init__(self):
        self.url = 'https://www.jsyks.com/kmy-mnks'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0'}

    def get_html(self, url):
        """获取网页"""
        res = requests.get(self.url, headers=self.headers)
        return res.text

    def parse_html(self, html):
        """筛选数据"""

        # 用etree解析html
        html = etree.HTML(html)
        # 找到类名为Content的ul标签里的li
        lis = html.xpath('//ul[@class="Content"]/li')
        for index, li in enumerate(lis):
            # 题目，./是接着路径继续的意思
            question = li.xpath('./strong/text()')[0]

            # 选项
            option = li.xpath('./b/text()')

            # 答案
            answer = li.xpath('./@k')[0]

            # 图片    有的题目有图片有的没有，要加入判断
            img = li.xpath('./strong/u/img/@ybsrc')
            if img:
                img = img[0]
            else:
                img = None

            print(f"第{index+1}题：{question}\n选项为{option}\n图片地址为{img}\n答案为{answer}")
            print('-'*30)

    def run(self):
        """启动程序"""
        html = self.get_html(self.url)
        self.parse_html(html)


if __name__ == '__main__':
    spider = Driver()
    spider.run()
